Dissertation Appendix C
Chapter 2: Regression based random forest modeling of inland pacific northwest (iPNW) drought-related wheat insurance loss, using time lagged climate correlation matrix assocation
Appendix C documents Chapter 2 analyses related to agricultural commodity loss, at a county level, from 2001-2015, across the 24 county region of the Inland Pacific Northwest (IPNW). In this analysis, we use a time lagged matrix approach to examine a range of combinations of climate and insurance loss for wheat due to drought.
Our analysis can be described in 5 specific steps:
Step 1. We subset insurance loss claims by wheat due to drought
Step 2. We aggregate climate data by month and county for the 24 county study area
Step 3. We construct a design matrix of all monthly climate combinations, per county and per climate variable.
Step 4. We select the monthly combination that has the highest correlation with wheat/drought claims, per county, for each year (2001 to 2015). Based on the output, we assemble an optimized climate/insurance loss dataset for all counties.
Step 5. We examine conditional relationships of climate to insurance loss using regression based rand forest analysis
In step 1, we perform a broad examination of wheat insurance loss due to drought for our 24 county iPNW study area, by calculating the ratio of wheat/drought acreage claims vs. all other wheat related damage cause claims, and comparing that data to individual climate variables. Example: For Whitman County, WA - we calculated the total amount of insurance claim acreage for wheat due to drought (for 2001 to 2015 combined), and divided that by the total amount of wheat insurane acreage for all other damage causes (excessive moisture, hail, frost, freeze, etc). We did this for each of the 24 counties.
We then calculate a summary value for each climate variable (plus aridity - which is PR / PET), for each county, from 2001 to 2015. We then constructed three scatterplots for comparison. Each point represents a county. For precipitation and potential evapotranspiration, we use the average annual total.
ipnw_climate_county_comparison <- function(var_commodity, var_damage) {
library(plotrix)
library(ggplot2)
library(gridExtra)
library(RCurl)
#----load climate data
#URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climatology/climatology.zip"
#destfile <- "/tmp/climatology.zip"
#download.file(URL, destfile)
#outDir<-"/tmp/climatology/"
#unzip(destfile,exdir=outDir)
ID2001 <- read.csv("/tmp/climatology/Idaho_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2002 <- read.csv("/tmp/climatology/Idaho_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2003 <- read.csv("/tmp/climatology/Idaho_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2004 <- read.csv("/tmp/climatology/Idaho_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2005 <- read.csv("/tmp/climatology/Idaho_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2006 <- read.csv("/tmp/climatology/Idaho_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2007 <- read.csv("/tmp/climatology/Idaho_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2008 <- read.csv("/tmp/climatology/Idaho_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2009 <- read.csv("/tmp/climatology/Idaho_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2010 <- read.csv("/tmp/climatology/Idaho_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2011 <- read.csv("/tmp/climatology/Idaho_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2012 <- read.csv("/tmp/climatology/Idaho_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2013 <- read.csv("/tmp/climatology/Idaho_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2014 <- read.csv("/tmp/climatology/Idaho_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2015 <- read.csv("/tmp/climatology/Idaho_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2001 <- read.csv("/tmp/climatology/Oregon_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2002 <- read.csv("/tmp/climatology/Oregon_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2003 <- read.csv("/tmp/climatology/Oregon_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2004 <- read.csv("/tmp/climatology/Oregon_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2005 <- read.csv("/tmp/climatology/Oregon_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2006 <- read.csv("/tmp/climatology/Oregon_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2007 <- read.csv("/tmp/climatology/Oregon_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2008 <- read.csv("/tmp/climatology/Oregon_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2009 <- read.csv("/tmp/climatology/Oregon_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2010 <- read.csv("/tmp/climatology/Oregon_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2011 <- read.csv("/tmp/climatology/Oregon_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2012 <- read.csv("/tmp/climatology/Oregon_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2013 <- read.csv("/tmp/climatology/Oregon_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2014 <- read.csv("/tmp/climatology/Oregon_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2015 <- read.csv("/tmp/climatology/Oregon_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2001 <- read.csv("/tmp/climatology/Washington_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2002 <- read.csv("/tmp/climatology/Washington_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2003 <- read.csv("/tmp/climatology/Washington_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2004 <- read.csv("/tmp/climatology/Washington_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2005 <- read.csv("/tmp/climatology/Washington_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2006 <- read.csv("/tmp/climatology/Washington_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2007 <- read.csv("/tmp/climatology/Washington_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2008 <- read.csv("/tmp/climatology/Washington_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2009 <- read.csv("/tmp/climatology/Washington_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2010 <- read.csv("/tmp/climatology/Washington_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2011 <- read.csv("/tmp/climatology/Washington_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2012 <- read.csv("/tmp/climatology/Washington_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2013 <- read.csv("/tmp/climatology/Washington_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2014 <- read.csv("/tmp/climatology/Washington_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2015 <- read.csv("/tmp/climatology/Washington_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ziggy.df <- rbind(ID2001, ID2002, ID2003, ID2004, ID2005, ID2006, ID2007, ID2008, ID2009, ID2010, ID2011, ID2012, ID2013, ID2014, ID2015)
ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)
ziggy.df <- rbind(WA2001, WA2002, WA2003, WA2004, WA2005, WA2006, WA2007, WA2008, WA2009, WA2010, WA2011, WA2012, WA2013, WA2014, WA2015)
WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)
ziggy.df <- rbind(OR2001, OR2002, OR2003, OR2004, OR2005, OR2006, OR2007, OR2008, OR2009, OR2010, OR2011, OR2012, OR2013, OR2014, OR2015)
OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)
countiesfips <- read.csv("/tmp/countiesfips.csv",
header = TRUE, strip.white = TRUE, sep = ",")
colnames(countiesfips) <- c("countyfips", "county", "state")
countiesfips$countyfips <- sprintf("%05d", countiesfips$countyfips)
OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))
#------add crop insurance data
#URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/RMA_originaldata/RMA_originaldata_txt.zip"
#destfile <- "/tmp/RMA_originaldata_txt.zip"
#download.file(URL, destfile)
#outDir<-"/tmp/RMA_originaldata/"
#unzip(destfile,exdir=outDir)
rma1989 <- read.csv("/tmp/RMA_originaldata/1989.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1990 <- read.csv("/tmp/RMA_originaldata/1990.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1991 <- read.csv("/tmp/RMA_originaldata/1991.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1992 <- read.csv("/tmp/RMA_originaldata/1992.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1993 <- read.csv("/tmp/RMA_originaldata/1993.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1994 <- read.csv("/tmp/RMA_originaldata/1994.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1995 <- read.csv("/tmp/RMA_originaldata/1995.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1996 <- read.csv("/tmp/RMA_originaldata/1996.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1997 <- read.csv("/tmp/RMA_originaldata/1997.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1998 <- read.csv("/tmp/RMA_originaldata/1998.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1999 <- read.csv("/tmp/RMA_originaldata/1999.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2000 <- read.csv("/tmp/RMA_originaldata/2000.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2001 <- read.csv("/tmp/RMA_originaldata/2001.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2002 <- read.csv("/tmp/RMA_originaldata/2002.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2003 <- read.csv("/tmp/RMA_originaldata/2003.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2004 <- read.csv("/tmp/RMA_originaldata/2004.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2005 <- read.csv("/tmp/RMA_originaldata/2005.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2006 <- read.csv("/tmp/RMA_originaldata/2006.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2007 <- read.csv("/tmp/RMA_originaldata/2007.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2008 <- read.csv("/tmp/RMA_originaldata/2008.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2009 <- read.csv("/tmp/RMA_originaldata/2009.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2010 <- read.csv("/tmp/RMA_originaldata/2010.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2011 <- read.csv("/tmp/RMA_originaldata/2011.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2012 <- read.csv("/tmp/RMA_originaldata/2012.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2013 <- read.csv("/tmp/RMA_originaldata/2013.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2014 <- read.csv("/tmp/RMA_originaldata/2014.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2015 <- read.csv("/tmp/RMA_originaldata/2015.txt", sep = "|", header = FALSE, strip.white = TRUE)
#load individual files from 1989 to 2000
RMA_1989 <- rbind(rma1989, rma1990, rma1991, rma1992, rma1993, rma1994, rma1995, rma1996, rma1997, rma1998, rma1999, rma2000)
#filter columns
RMA_1989 <- data.frame(RMA_1989[,1], RMA_1989[,3], RMA_1989[,5], RMA_1989[,7], RMA_1989[,12], RMA_1989[,13], RMA_1989[,14], RMA_1989[,15])
#set column names
colnames(RMA_1989) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "loss")
#load individual files from 2001 to 2015
RMA <- rbind(rma2001, rma2002, rma2003, rma2004, rma2005, rma2006, rma2007, rma2008, rma2009, rma2010, rma2011, rma2012, rma2013, rma2014, rma2015)
#filter columns
RMA <- data.frame(RMA[,1], RMA[,3], RMA[,5], RMA[,7], RMA[,12], RMA[,13], RMA[,14], RMA[,15], RMA[,16])
#set column names
colnames(RMA) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "acres", "loss")
#subset 2001 to 2015 for ID, WA, and OR
RMA_PNW <- subset(RMA, state == "WA" | state == "OR" | state == "ID" )
#subset 1989 to 2000 for ID, WA, and OR
RMA_PNW_1989 <- subset(RMA_1989, state == "WA" | state == "OR" | state == "ID" )
#temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
#myfiles = lapply(temp, read.csv, header = TRUE)
#ziggy.df <- do.call(rbind , myfiles)
RMA_PNW <- as.data.frame(RMA_PNW)
RMA_PNW$county <- as.character(RMA_PNW$county)
RMA_PNW$damagecause <- as.character(RMA_PNW$damagecause)
xrange <- RMA_PNW
RMA_PNW$commodity <- trimws(RMA_PNW$commodity)
RMA_PNW$county <- trimws(RMA_PNW$county)
RMA_PNW$damagecause <- trimws(RMA_PNW$damagecause)
ID_RMA_PNW <- subset(RMA_PNW, state == "ID")
#--acres for all damage causes aggregated
ID_loss_commodity <- subset(ID_RMA_PNW, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("county", "state", "all_damagecause_acres")
#-loss and lossperacre for just one damage cause
ID_loss1 <- subset(ID_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("county", "state", "loss")
colnames(ID_loss2_acres) <- c("county", "state", "acres")
ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres
#--
ID_loss_climate <- merge(ID_loss2, ID2, by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("county", "state"))
#---WA
WA_RMA_PNW <- subset(RMA_PNW, state == "WA")
#--acres for all damage causes aggregated
WA_loss_commodity <- subset(WA_RMA_PNW, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("county", "state", "all_damagecause_acres")
WA_loss1 <- subset(WA_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("county", "state", "loss")
colnames(WA_loss2_acres) <- c("county", "state", "acres")
WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres
WA_loss_climate <- merge(WA_loss2, WA2, by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("county", "state"))
#--OR
OR_RMA_PNW <- subset(RMA_PNW, state == "OR")
#--acres for all damage causes aggregated
OR_loss_commodity <- subset(OR_RMA_PNW, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("county", "state", "all_damagecause_acres")
OR_loss1 <- subset(OR_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("county", "state", "loss")
colnames(OR_loss2_acres) <- c("county", "state", "acres")
OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres
OR_loss_climate <- merge(OR_loss2, OR2, by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("county", "state"))
#-merge all three states
iPNW_loss_climate <- rbind(OR_loss_climate_2, ID_loss_climate_2, WA_loss_climate_2)
#--subset to only iPNW 24 counties
Franklin <- subset(iPNW_loss_climate, county == "Franklin" & state == "WA")
iPNW_loss_climate <- subset(iPNW_loss_climate, county == "Benewah"
| county == "Latah" | county == "Nez Perce" | county == "Lewis"
| county == "Idaho" | county == "Wasco" | county == "Sherman"
| county == "Gilliam" | county == "Morrow" | county == "Umatilla"
| county == "Union" | county == "Wallowa" | county == "Douglas"
| county == "Walla Walla" | county == "Benton" | county == "Columbia"
| county == "Asotin" | county == "Garfield"
| county == "Grant" | county =="Whitman" | county == "Spokane"
| county == "Lincoln" | county == "Adams" )
iPNW_loss_climate <- rbind(iPNW_loss_climate, Franklin)
iPNW_loss_climate$pct_acreage <- iPNW_loss_climate$acres / iPNW_loss_climate$all_damagecause_acres
#write.csv(iPNW_loss_climate, file = "/tmp/iPNW_loss_climate.csv")
#iPNW_loss_climate <- subset(iPNW_loss_climate, year == var_year)
#tmmx
iPNW_loss_climate_tmmx <- iPNW_loss_climate[order(iPNW_loss_climate$tmmx),]
iPNW_loss_climate_tmmx$tmmx <- iPNW_loss_climate_tmmx$tmmx - 273
#not needed#yrangemin <- min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) - (.05 * min(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#not needed#yrangemax <- max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))) + (.05 * max(eval(parse(text=paste("iPNW_loss_climate_tmmx$", unit, sep="")))))
#y2rangemin <- min(iPNW_loss_climate_tmmx$tmmx) - (.05 * min(iPNW_loss_climate_tmmx$tmmx))
#y2rangemax <- max(iPNW_loss_climate_tmmx$tmmx) + (.05 * max(iPNW_loss_climate_tmmx$tmmx))
#twoord.plot(c(1:nrow(iPNW_loss_climate_tmmx)), iPNW_loss_climate_tmmx$pct_acreage, c(1:nrow(iPNW_loss_climate_tmmx)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_tmmx$tmmx, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean Max Temperature (C)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to mean TMMX", sep=""))
#text(1:nrow(iPNW_loss_climate_tmmx), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_tmmx$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 2)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 2)
#mtext(4, text = "Annual Mean Precipitation (mm)", line = 1, cex = 2)
#pr
par(mfrow=c(1,3),mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))
iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),]
iPNW_loss_climate_pr$pr_year <- iPNW_loss_climate_pr$pr * 12
pr <- ggplot(iPNW_loss_climate_pr, aes(x=pr_year, y=pct_acreage)) +
geom_point()+
geom_smooth(method = "loess")+
xlab("Annual Precipitation (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
#yrangemin <- min(iPNW_loss_climate_pr$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pr$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1, cex = 1.5,
#not needed# labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PR (mm)", line = 1, cex = 1.5)
#mtext(3, text = "2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean precipitation/county", line = 1, cex = 2)
#pr
#par(mar=c(11.8, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
#iPNW_loss_climate_pr <- iPNW_loss_climate[order(-iPNW_loss_climate$pr),]
#y2rangemin <- min(iPNW_loss_climate_pr$pr) - (.05 * min(iPNW_loss_climate_pr$pr))
#y2rangemax <- max(iPNW_loss_climate_pr$pr) + (.05 * max(iPNW_loss_climate_pr$pr))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pr)), iPNW_loss_climate_pr$pct_acreage, c(1:nrow(iPNW_loss_climate_pr)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pr$pr, mar=c(8,4,4,4), ylab = "% wheat/drought insurance loss acreage", xticklab=c(" "), rylab = "Annual Mean Precipitation (mm)", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste("2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to annual mean PR/county", sep=""))
#text(1:nrow(iPNW_loss_climate_pr), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_pr$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 9, cex = 1)
#pet
iPNW_loss_climate_pet <- iPNW_loss_climate[order(iPNW_loss_climate$pet),]
iPNW_loss_climate_pet$pet_year <- iPNW_loss_climate_pet$pet * 12
pet <- ggplot(iPNW_loss_climate_pet, aes(x=pet_year, y=pct_acreage)) +
geom_point()+
geom_smooth() +
xlab("Annual PET (mm)") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pet$pet) - (.05 * min(iPNW_loss_climate_pet$pet))
#y2rangemax <- max(iPNW_loss_climate_pet$pet) + (.05 * max(iPNW_loss_climate_pet$pet))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pet)), iPNW_loss_climate_pet$pct_acreage, c(1:nrow(iPNW_loss_climate_pet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_pet$pet, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_pet), 0 - .05, srt = 90, adj = 1,
#not needed# labels = iPNW_loss_climate_pet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean PET (mm)", line = 1, cex = 1.5)
#pr/pet
iPNW_loss_climate_prpet <- iPNW_loss_climate
iPNW_loss_climate_prpet$pet_year <- iPNW_loss_climate_prpet$pet * 12
iPNW_loss_climate_prpet$pr_year <- iPNW_loss_climate_prpet$pr * 12
iPNW_loss_climate_prpet$prpet <- iPNW_loss_climate_prpet$pr_year / iPNW_loss_climate_prpet$pet_year
#iPNW_loss_climate_prpet <- iPNW_loss_climate[order(-iPNW_loss_climate$prpet),]
# ggplot scatterplot
prpet <- ggplot(iPNW_loss_climate_prpet, aes(x=prpet, y=pct_acreage)) +
geom_point()+
geom_smooth()+
xlab("Aridity Index") + ylab("Percentage Drought Wheat Acreage Loss") +
theme(text=element_text(size=16, family="serif"))
grid.arrange(pr, pet, prpet, nrow = 1)
#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#dual axis barplot
#y2rangemin <- min(iPNW_loss_climate_prpet$prpet) - (.05 * min(iPNW_loss_climate_prpet$prpet))
#y2rangemax <- max(iPNW_loss_climate_prpet$prpet) + (.05 * max(iPNW_loss_climate_prpet$prpet))
#twoord.plot(c(1:nrow(iPNW_loss_climate_prpet)), iPNW_loss_climate_prpet$pct_acreage, c(1:nrow(iPNW_loss_climate_prpet)), rylim=c(y2rangemin, y2rangemax), lylim=c(0, 1), iPNW_loss_climate_prpet$prpet, mar=c(8,4,4,4), xticklab=c(" "), type=c("bar", "b"), lcol = "red", rcol = "blue")
#not needed#text(1:nrow(iPNW_loss_climate_prpet), 0 - .05, srt = 90, adj = 1,
#not needed# labels = iPNW_loss_climate_prpet$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 4, cex = 1.5)
#not needed#mtext(2, text = "% wheat/drought insurance loss acreage", line = 4, cex = 1.5)
#mtext(4, text = "Annual Mean Aridity Index (PR/PET (mm))", line = 1, cex = 1.5)
#pdsi - not used
#iPNW_loss_climate_pdsi <- iPNW_loss_climate[order(iPNW_loss_climate$pdsi),]
#not needed#yrangemin <- min(iPNW_loss_climate_pet$lossperacre) - 10
#not needed#yrangemax <- max(iPNW_loss_climate_pet$lossperacre) + 10
#y2rangemin <- min(iPNW_loss_climate_pdsi$pdsi) - (-.25 * min(iPNW_loss_climate_pdsi$pdsi))
#y2rangemax <- max(iPNW_loss_climate_pdsi$pdsi) + (-.5 * max(iPNW_loss_climate_pdsi$pdsi))
#twoord.plot(c(1:nrow(iPNW_loss_climate_pdsi)), iPNW_loss_climate_pdsi$pct_acreage, c(1:nrow(iPNW_loss_climate_pdsi)), rylim=c(y2rangemin, y2rangemax), lylim=c(0,1), iPNW_loss_climate_pdsi$pdsi, mar=c(8,4,4,4), ylab = "% wheat insurance loss acreage due to drought", xticklab=c(" "), rylab = "Mean PDSI", type=c("bar", "b"), lcol = "red", rcol = "blue", main = paste(" 2001 - 2015 iPNW % Wheat/drought insurance loss acreage\n compared to PDSI", sep=""))
#text(1:nrow(iPNW_loss_climate_pdsi), 0 - .05, srt = 90, adj = 1,
# labels = iPNW_loss_climate_pdsi$county, xpd = TRUE)
#mtext(1, text = "iPNW counties", line = 5)
}
ipnw_climate_county_comparison("WHEAT", "Drought")
For the three examined climate variables (potential evapotranspiration, precipitation, maximum temperature), a design matrix was developed for each of the 24 counties that are part of the defined iPNW study area. For each county, an absolute correlation was calculated for each monthly combination for each climate variable to the total wheat insurance loss due to drought. The result is a design matrix map that identifies the monthly combination with the highest correlation For example, for Whitman county, WA, for maximum temperature - the monthly combination with highest correlation between max temperature and wheat insurance loss due to drought was May/June/July (denoted as July 2).
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "tmmx"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
#URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_matrices/climate_matrices.zip"
#destfile <- "/tmp/climate_matrices.zip"
#download.file(URL, destfile)
#outDir<-"/tmp/climate_matrices"
#unzip(destfile,exdir=outDir)
#URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_correlation_summaries/climate_correlations_summaries.zip"
#destfile <- "/tmp/climate_correlations_summaries.zip"
#download.file(URL, destfile)
#outDir<-"/tmp/climate_correlations_summaries"
#unzip(destfile,exdir=outDir)
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pet"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red", "darkred"))(n = 32)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, symbreaks = FALSE, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=colorRampPalette(c("lightyellow", "yellow", "orange", "darkorange", "red")), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pr"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
state_var <- "WA"
county_var <- "Whitman"
commodity_var <- "WHEAT"
damage_var <- "Drought"
climate_var <- "pdsi"
predictor_var <- "cube_root_loss"
library(gplots)
library(plyr)
library(dplyr)
#library(plotly)
#packageVersion('plotly')
#response may be: loss_month, loss_per_acres, loss_year, total_acres_harvested, and total_acres_loss
#monthlist is jan-dec, each repeated 12 times
monthlist <- as.data.frame(rep(tolower(month.abb), each = 12))
#numlist is 12 months, repeated 12 times
numlist <- as.data.frame(rep((1:12), times = 12))
#monthnumlist puts month names and 1-12 together, which results in a vector of 144 that is the matrix of 12 by 12
monthnumlist <- as.data.frame(cbind(monthlist, numlist))
#renaming columns
colnames(monthnumlist) <- c("month", "monthcode")
#put the monthlist all together in one vector
monthnumlist$combined <- paste(monthnumlist$month, monthnumlist$monthcode, sep="")
#rename the vector
climmonth <- monthnumlist$combined
designmatrix <- matrix(NA, ncol=12, nrow=12)
#create an empty 144 vector to fill up with correlations between loss and climate
dmvector <- as.data.frame(rep(NA, times=144))
cl = 0
for (ppp in climmonth) {
cl = cl +1
setwd("/tmp/climate_matrices")
file1 <- read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", ppp, ".csv", sep=""))
climatevar <- as.data.frame(cbind((file1[climate_var]), file1[2]))
setwd("/tmp/climate_correlations_summaries")
file2 <- as.data.frame(read.csv(paste(state_var, "_", county_var, "_", commodity_var, "_", damage_var, "_", predictor_var, ".csv", sep="")))
file2 <- subset(file2, state == state_var)
file2 <- subset(file2, county == county_var)
colnames(file2) <- c("X", "year", "damagecause", "county", "state", "commodity", predictor_var )
climatevar$zscore <- scale(climatevar[1], center = TRUE, scale = TRUE)
colnames(climatevar[3]) <- "zscore"
kor <- join(climatevar, file2, by = "year")
kor2 <- subset(kor, damagecause != "NA")
colnames(kor2)[3] <- "zscore"
# kor2[9] <- as.numeric(kor2$zscore)
kor3 <- cor(kor2[1], kor2[9])
#insert kor3 into designmatrix iteratively
dmvector[cl,] <- kor3
}
dmvector <- as.data.frame(dmvector)
colnames(dmvector) <- "correlations"
dmvector2 <- (matrix(dmvector$correlations, 12, 12, TRUE) )
dmvector2 <- dmvector2[nrow(dmvector2):1, ]
dmvector3 <- dmvector2[4:12,]
dmvector3[9,4:12] <- NA
dmvector3[8,5:12] <- NA
dmvector3[7,6:12] <- NA
dmvector3[6,7:12] <- NA
dmvector3[5,8:12] <- NA
dmvector3[4,9:12] <- NA
dmvector3[3,10:12] <- NA
dmvector3[2,11:12] <- NA
dmvector3[1,12:12] <- NA
dmvector <- c(dmvector3[9,], dmvector3[8,], dmvector3[7,], dmvector3[6,], dmvector3[5,], dmvector3[4,], dmvector3[3,], dmvector3[2,], dmvector3[1,])
dmvector <- data.frame(dmvector)
if(climate_var=='pr'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmin'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='rmax'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmn'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='pet'){
dmv <- which.max( dmvector[1:108,1])
} else {
if(climate_var=='fm100'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='fm1000'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='pdsi'){
dmv <- which.min( dmvector[1:108,1])
} else {
if(climate_var=='tmmx'){
dmv <- which.max( dmvector[1:108,1])
}
}
}
}
}
}
}
}
}
#dmv <- which.max(abs( dmvector[1:108,1]) )
dmv <- as.data.frame(dmv)
colnames(dmv)[1] <- "row"
#dmvector1a <- max(dmvector$correlations)
#dmvector1b <- data.frame(which=dmvector, correlations=dmvector[dmvector1a, ])
monthnumlist2 <- cbind(as.data.frame(c(1:144)), monthnumlist)
colnames(monthnumlist2)[1] <- "row"
monthnumlist3 <- subset(monthnumlist2, row == dmv$row)
#makeRects <- function(tfMat){
require(utils)
cAbove <- expand.grid(1:12, 1:9)[monthnumlist3$row,]
monthzzz <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep")
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n = 108)
#setwd("/dmine/data/USDA/agmesh-scenarios/Allstates/climatematrix_pngs")
# png(filename=paste(state1, "_", county1, "_", damage1, "_", climate_variable12, "_designmatrix.png", sep=""))
lblcol <- c(9:1)
dmvector3a <- round(dmvector3, digits = 2)
newone <<- as.data.frame(expand.grid(1:12, 1:9)[monthnumlist3$row,])
dmvector3a[9,4:12] <- NA
dmvector3a[8,5:12] <- NA
dmvector3a[7,6:12] <- NA
dmvector3a[6,7:12] <- NA
dmvector3a[5,8:12] <- NA
dmvector3a[4,9:12] <- NA
dmvector3a[3,10:12] <- NA
dmvector3a[2,11:12] <- NA
dmvector3a[1,12:12] <- NA
my_palette <- colorRampPalette(c("darkred", "red", "darkorange", "orange", "yellow", "lightyellow"))(n = 16)
nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, cexCol = 2, cexRow = 2, colsep=1:ncol(dmvector3), rowsep=1:nrow(dmvector3), col=my_palette, scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""))
# nba_heatmap <- heatmap.2(dmvector3, Rowv=NA, Colv=NA, col=topo.colors(16), scale="none", dendrogram = "none", trace="none", labRow = rev(monthzzz), tracecol = "black", key = TRUE, cellnote = dmvector3a, notecol = "black", notecex = 1.5, key.ylab = "", key.xlab = "loss correlation range", key.title = "", main=paste(climate_var, " vs ", predictor_var, "\n", county_var, " County ", state_var, " ", damage_var, "\n", "MAX Correlation in cell: ", monthnumlist3$combined, sep=""), add.expr=points(newone$Var1,newone$Var2, pch=15, cex = 5.5 , col=rgb(199/255, 0, 0, 0.8)))
In step 3, we generate maps of our climate-lagged correlations, for each climate variable. Each county is labeled with the optimum monthly period that has the highest correlation for that climate variable and wheat/drought insurance loss (July 2 = two months previous to July, or May/June/July), along with the correlation value. Each correlation value is absolute (correlation coefficent - R)
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pr"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
#URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_correlations/climate_correlations.zip"
#destfile <- "/tmp/climate_correlations.zip"
#download.file(URL, destfile)
#outDir<-"/tmp/climate_correlations"
#unzip(destfile,exdir=outDir)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
myLabelFormat = function(..., reverse_order = FALSE){
if(reverse_order){
function(type = "numeric", cuts){
cuts <- sort(cuts, decreasing = T)
}
}else{
labelFormat(...)
}
}
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal2, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pet"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright")
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "tmmx"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright")
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
climate_var <- "pdsi"
predictor_var <- "cube_root_loss"
monthend <- "jul"
monthnumber <- 2
library(RColorBrewer)
library(dplyr)
setwd("/tmp/climate_correlations/")
if (predictor_var == "loss") {
predictor <- "crop_commodity_loss"
}else {
predictor <- predictor_var
}
files <- list.files(pattern = predictor)
filey <- do.call(rbind, strsplit(files, '[_]'))
filey <- subset(filey, filey[,5] == climate_var)
colnames(filey) <- c("state", "county", "commodity", "damage", "climate", "crop1", "crop2", "response", "crop3")
filey <- as.data.frame(filey)
data <- with(filey, paste(state, "_", county, "_", commodity, "_", damage, "_", climate, "_", crop1, "_", crop2, "_", response, "_", crop3, sep=""))
tables <- lapply(data, read.csv, header = TRUE)
tables <- lapply(tables, function(x) { x["X"] <- NULL; x }) #--remove first index row from each list
tables <- lapply(tables, function(x) dplyr::arrange(x, -row_number())) #--(flips matrix - puts jan as 1st row and sept as 9th row)
for (i in 1:26) {
tables[[i]][1,4:12] <- NA
tables[[i]][2,5:12] <- NA
tables[[i]][3,6:12] <- NA
tables[[i]][4,7:12] <- NA
tables[[i]][5,8:12] <- NA
tables[[i]][6,9:12] <- NA
tables[[i]][7,10:12] <- NA
tables[[i]][8,11:12] <- NA
tables[[i]][9,12:12] <- NA
}
monthly <- match(monthend, tolower(month.abb))
if(climate_var=='pr'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='rmax'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmmx'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='tmin'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm100'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='fm1000'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pet'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == max(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- max(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
} else {
if(climate_var=='pdsi'){
bestcounty <- matrix(NA,nrow=26,ncol=3)
for (i in 1:26) {
temp <- which(tables[[i]] == min(tables[[i]], na.rm=T), arr.ind = TRUE)
temp2 <- min(tables[[i]], na.rm=T)
bestcounty[i,1] <- temp[1,1]
bestcounty[i,2] <- temp[1,2]
bestcounty[i,3] <- temp2
temp <- NA
temp2 <- NA
}
}
}
}
}
}
}
}
}
}
bestcounty[,1] <- tolower(month.abb[bestcounty[,1]])
bestcounty2 <- cbind(data.frame(filey$county), bestcounty)
colnames(bestcounty2) <- c("NAME", "MONTH", "ENDMONTH", "CORRELATION")
#new
#!!!!!!fix-row by column, or number of months by ending month
table2 <- lapply(tables, function(x) x[monthly, as.numeric(monthnumber)])
table3 <- data.frame(matrix(unlist(table2), nrow=length(table2), byrow=T))
colnames(table3) <- "correlations"
#combined <- do.call(rbind , tables)
table4 <- cbind(filey, table3)
#if (predictor_var == "loss") {
#predictor_var <- "crop_commodity_loss"
#}
table5 <- table4[c(2:5,10)]
colnames(table5) <- c("NAME", "COMMODITY", "DAMAGE", "climate", "correlations")
#table5$STATE_NAME <- state.name[match(table5[,1],state.abb)]
counties <- readShapePoly('/tmp/threestate_palouse.shp',
proj4string=CRS
("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
projection = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
counties <- counties[counties$NAME != "Kootenai",]
counties <- counties[counties$NAME != "Clearwater",]
counties2 <- merge(counties, table5, by = "NAME" )
#--new
counties3 <- merge(counties2, bestcounty2, by = "NAME")
counties3$MONTHCOMBO <- paste(counties3$MONTH, counties3$ENDMONTH, sep="")
#--new
#colorbrew <- list(color = brewer.pal(26, c("green", "blue", "yellow")))
#my_palette <- colorRampPalette(c("darkred", "lightyellow"))(n = 26)
counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
#pal <- colorNumeric(rev(my_palette), na.color = "#ffffff", domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal <- colorNumeric(colorRampPalette(c("red", "orange", "yellow", "lightyellow"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
pal2 <- colorNumeric(colorRampPalette(c("lightyellow", "yellow", "orange", "red"))(11), na.color = "#ffffff",
domain = eval(parse(text=paste("counties3$", "CORRELATION", sep=""))))
myLabelFormat = function(..., reverse_order = FALSE){
if(reverse_order){
function(type = "numeric", cuts){
cuts <- sort(cuts, decreasing = T)
}
}else{
labelFormat(...)
}
}
#--
#colorss = colorRampPalette(brewer.pal(11,"Spectral"))
#finalcol <- colorss(len <- length(counties3$CORRELATION))
#finalcol2 <- topo.colors(length(counties3$CORRELATION))[order(order(counties3$CORRELATION))]
#cellselect <- paste(monthend, monthnumber, sep="")
#par(mfrow=c(1,4))
#layout(matrix(c(1,1,1,2), 1, 4, byrow = TRUE))
#par(mar = c(1,1,1,1) + 0.1)
#plot(counties3, col = finalcol2, xaxs="i", yaxs="i")
##text(coordinates(counties2), labels=round(counties2$correlations, 2), cex=1.5, col = "black")
#added from ID
#corre <- round(as.numeric(as.character(counties3$CORRELATION)), 2)
#text(coordinates(counties2), labels=paste(counties3$MONTHCOMBO, "\n", corre, sep=""), cex=1.5, col = "white", font = 2)
#--
exte <- extent(counties3)
library(htmltools)
tag.map.title <- tags$style(HTML("
.leaflet-control.map-title {
transform: translate(-50%,20%);
position: fixed !important;
left: 50%;
text-align: center;
padding-left: 10px;
padding-right: 10px;
background: rgba(255,255,255,0.75);
font-weight: bold;
font-size: 24px;
}
"))
title <- tags$div(
tag.map.title, HTML(paste("IPNW Correlation, Climate vs. ", predictor_var, " by County for ", climate_var, sep=""))
)
lat_long <- coordinates(counties3)
#labels <- paste(counties3$MONTHCOMBO, as.character(round(counties3$CORRELATION, 2)), sep = "<br/>")
#counties3$CORRELATION <- as.numeric(levels(counties3$CORRELATION))[counties3$CORRELATION]
counties3a <- data.frame(counties3)
labs <- lapply(seq(nrow(counties3a)), function(i) {
paste0(as.character(round(counties3a[i, "CORRELATION"],2)), '<br/>',
counties3a[i, "MONTHCOMBO"])
})
map <- leaflet(data = counties3) %>%
addProviderTiles(providers$Hydda.Base) %>%
addProviderTiles(providers$Stamen.TonerLines) %>%
#addControl(title, position = "topleft", className="map-title") %>%
fitBounds(exte[1], exte[3], exte[2], exte[4]) %>% addPolygons(fillColor = ~pal(counties3$CORRELATION), fillOpacity = .8, weight = 2, stroke = TRUE, smoothFactor = 0.5, color = "black") %>%
addLabelOnlyMarkers(data = counties3, lng = lat_long[,1], lat = lat_long[,2], label = lapply(labs, HTML), labelOptions = labelOptions(noHide = TRUE, direction = 'middle', textOnly = TRUE, textsize = "14px", col = "white", style=list(
'font-family'= 'serif',
'font-style'= 'bold',
'font-size' = '14px'
))) %>%
addLegend(pal = pal2, values = counties3$CORRELATION, labels = c("1", "2"), opacity = .5, title = paste("Correlation", " Matrix", sep="<br>"),
position = "bottomright", labFormat = labelFormat(transform = function(x) sort(x, decreasing = TRUE)))
map
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
var_commodity <- "WHEAT"
var_damage <- "Drought"
#load wheat pricing
wheatprice <- read.csv("/tmp/wheatprice.csv", header = TRUE, strip.white = TRUE, sep = ",")
wheatprice <- wheatprice[-1]
colnames(wheatprice) <- c("year", "price")
#--accessing output of design matrix/time lag data based on monthly selection from dashboard runs
#URL <- "https://raw.githubusercontent.com/erichseamon/seamon_dissertation/master/data/climate_outputs/climate_outputs.zip"
#destfile <- "/tmp/climate_outputs.zip"
#download.file(URL, destfile)
#outDir<-"/tmp/climate_outputs"
#unzip(destfile,exdir=outDir)
setwd("/tmp/climate_outputs")
var1 <- read.csv("pr_jun2_cube_root_loss_climatecorrelation.csv")
colnames(var1)[9] <- paste(colnames(var1)[2], "_zscore", sep="")
var2 <- read.csv("pet_jul3_cube_root_loss_climatecorrelation.csv")
colnames(var2)[9] <- paste(colnames(var2)[2], "_zscore", sep="")
var3 <- read.csv("tmmx_jul2_cube_root_loss_climatecorrelation.csv")
colnames(var3)[9] <- paste(colnames(var3)[2], "_zscore", sep="")
var4 <- read.csv("pr_jun2_cube_root_acres_climatecorrelation.csv")
colnames(var4)[9] <- paste(colnames(var4)[2], "_zscore", sep="")
var5 <- read.csv("pet_jun2_loss_per_acre_climatecorrelation.csv")
colnames(var5)[9] <- paste(colnames(var5)[2], "_zscore", sep="")
var6 <- read.csv("tmmx_jun1_cube_root_acres_climatecorrelation.csv")
colnames(var6)[9] <- paste(colnames(var6)[2], "_zscore", sep="")
var7 <- read.csv("pr_sep5_loss_climatedata.csv")
colnames(var7)[9] <- paste(colnames(var7)[2], "_zscore", sep="")
var8 <- read.csv("pet_sep5_loss_climatedata.csv")
colnames(var8)[9] <- paste(colnames(var8)[2], "_zscore", sep="")
var9 <- read.csv("tmmx_jul2_loss_climatedata.csv")
colnames(var9)[9] <- paste(colnames(var9)[2], "_zscore", sep="")
var9x <- read.csv("fm100_jul3_cube_root_loss_climatedata.csv")
colnames(var9x)[9] <- paste(colnames(var9x)[2], "_zscore", sep="")
var10x <- read.csv("fm1000_aug2_cube_root_loss_climatedata.csv")
colnames(var10x)[9] <- paste(colnames(var10x)[2], "_zscore", sep="")
var11x <- read.csv("pdsi_sep4_cube_root_loss_climatedata.csv")
colnames(var11x)[9] <- paste(colnames(var11x)[2], "_zscore", sep="")
var12x <- read.csv("pdsi_sep4_cube_root_loss_climatecorrelation.csv")
colnames(var12x)[9] <- paste(colnames(var12x)[2], "_zscore", sep="")
var7a <- read.csv("pr_sep5_loss_climatecorrelation.csv")
colnames(var7a)[9] <- paste(colnames(var7a)[2], "_zscore", sep="")
var8a <- read.csv("pet_sep5_loss_climatecorrelation.csv")
colnames(var8a)[9] <- paste(colnames(var8a)[2], "_zscore", sep="")
var9a <- read.csv("tmmx_jul2_loss_climatecorrelation.csv")
colnames(var9a)[9] <- paste(colnames(var9a)[2], "_zscore", sep="")
data1 <- cbind(var1, var2[9], var3[9])
data2 <- cbind(var1[1:6], var2[2], var3[2])
data3 <- cbind(var4[1:6], var5[2], var6[2])
data3 <- plyr::join(data3, wheatprice_year, by = "year")
data4 <- cbind(var7[1:6], var8[2], var9[2], var9x[2], var10x[2], var11x[2], var12x[3] )
#data4$prpet <- data4$pr / data4$pet
data4a <- dplyr::left_join(data4, var7a, by = c("year" = "year", "county" = "county"))
data4a <- dplyr::left_join(data4a, wheatprice, by = c("year"))
data4aa <- na.omit(data4a)
colnames(data4aa) <- c("X", "pr", "year", "pr_zscore", "damagecause", "county", "pet", "tmmx", "fm100", "fm1000", "pdsi", "cube_loss", "X.y", "pr2", "loss", "state", "commodity", "matrixnumber", "clim_zscore", "loss_zscore", "price")
data4aa$prpet <- data4aa$pr / data4aa$pet
data4aa <- subset(data4aa, , c(year, damagecause, county, commodity, state, matrixnumber, cube_loss, pr, pdsi, pet, tmmx, fm100, fm1000, price, prpet))
#write.csv(data4aa, file = "/dmine/data/USDA/agmesh-scenarios/Allstates/summaries/lag_palouse1.csv")
data4aa$pr_scaled <- scale(data4aa$pr, scale = TRUE, center = TRUE)
data4aa$tmmx_scaled <- scale(data4aa$tmmx, scale = TRUE, center = TRUE)
data4aa$pet_scaled <- scale(data4aa$pet, scale = TRUE, center = TRUE)
data4aa$pdsi_scaled <- scale(data4aa$pdsi, scale = TRUE, center = TRUE)
data4aa$price_scaled <- scale(data4aa$price, scale = TRUE, center = TRUE)
#percentage acreage by county and year, per state
library(plotrix)
library(ggplot2)
library(gridExtra)
ID2001 <- read.csv("/tmp/climatology/Idaho_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2002 <- read.csv("/tmp/climatology/Idaho_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2003 <- read.csv("/tmp/climatology/Idaho_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2004 <- read.csv("/tmp/climatology/Idaho_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2005 <- read.csv("/tmp/climatology/Idaho_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2006 <- read.csv("/tmp/climatology/Idaho_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2007 <- read.csv("/tmp/climatology/Idaho_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2008 <- read.csv("/tmp/climatology/Idaho_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2009 <- read.csv("/tmp/climatology/Idaho_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2010 <- read.csv("/tmp/climatology/Idaho_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2011 <- read.csv("/tmp/climatology/Idaho_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2012 <- read.csv("/tmp/climatology/Idaho_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2013 <- read.csv("/tmp/climatology/Idaho_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2014 <- read.csv("/tmp/climatology/Idaho_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ID2015 <- read.csv("/tmp/climatology/Idaho_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2001 <- read.csv("/tmp/climatology/Oregon_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2002 <- read.csv("/tmp/climatology/Oregon_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2003 <- read.csv("/tmp/climatology/Oregon_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2004 <- read.csv("/tmp/climatology/Oregon_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2005 <- read.csv("/tmp/climatology/Oregon_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2006 <- read.csv("/tmp/climatology/Oregon_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2007 <- read.csv("/tmp/climatology/Oregon_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2008 <- read.csv("/tmp/climatology/Oregon_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2009 <- read.csv("/tmp/climatology/Oregon_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2010 <- read.csv("/tmp/climatology/Oregon_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2011 <- read.csv("/tmp/climatology/Oregon_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2012 <- read.csv("/tmp/climatology/Oregon_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2013 <- read.csv("/tmp/climatology/Oregon_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2014 <- read.csv("/tmp/climatology/Oregon_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
OR2015 <- read.csv("/tmp/climatology/Oregon_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2001 <- read.csv("/tmp/climatology/Washington_2001_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2002 <- read.csv("/tmp/climatology/Washington_2002_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2003 <- read.csv("/tmp/climatology/Washington_2003_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2004 <- read.csv("/tmp/climatology/Washington_2004_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2005 <- read.csv("/tmp/climatology/Washington_2005_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2006 <- read.csv("/tmp/climatology/Washington_2006_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2007 <- read.csv("/tmp/climatology/Washington_2007_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2008 <- read.csv("/tmp/climatology/Washington_2008_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2009 <- read.csv("/tmp/climatology/Washington_2009_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2010 <- read.csv("/tmp/climatology/Washington_2010_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2011 <- read.csv("/tmp/climatology/Washington_2011_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2012 <- read.csv("/tmp/climatology/Washington_2012_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2013 <- read.csv("/tmp/climatology/Washington_2013_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2014 <- read.csv("/tmp/climatology/Washington_2014_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
WA2015 <- read.csv("/tmp/climatology/Washington_2015_ipnw_summary", header = TRUE, strip.white = TRUE, sep = ",")
ziggy.df <- rbind(ID2001, ID2002, ID2003, ID2004, ID2005, ID2006, ID2007, ID2008, ID2009, ID2010, ID2011, ID2012, ID2013, ID2014, ID2015)
ID1 <- aggregate(.~countyfips + year, ziggy.df, sum)
ID1 <- aggregate(.~countyfips, ID1, mean)
ziggy.df <- rbind(WA2001, WA2002, WA2003, WA2004, WA2005, WA2006, WA2007, WA2008, WA2009, WA2010, WA2011, WA2012, WA2013, WA2014, WA2015)
WA1 <- aggregate(.~countyfips + year, ziggy.df, sum)
WA1 <- aggregate(.~countyfips, WA1, mean)
ziggy.df <- rbind(OR2001, OR2002, OR2003, OR2004, OR2005, OR2006, OR2007, OR2008, OR2009, OR2010, OR2011, OR2012, OR2013, OR2014, OR2015)
OR1 <- aggregate(.~countyfips + year, ziggy.df, sum)
OR1 <- aggregate(.~countyfips, OR1, mean)
countiesfips <- read.csv("/tmp/countiesfips.csv",
header = TRUE, strip.white = TRUE, sep = ",")
countiesfips <- countiesfips[-1]
colnames(countiesfips) <- c("countyfips", "county", "state")
countiesfips$countyfips <- sprintf("%05d", countiesfips$countyfips)
OR2 <- merge(countiesfips, OR1, by=("countyfips"))
ID2 <- merge(countiesfips, ID1, by=("countyfips"))
WA2 <- merge(countiesfips, WA1, by=("countyfips"))
rma1989 <- read.csv("/tmp/RMA_originaldata/1989.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1990 <- read.csv("/tmp/RMA_originaldata/1990.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1991 <- read.csv("/tmp/RMA_originaldata/1991.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1992 <- read.csv("/tmp/RMA_originaldata/1992.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1993 <- read.csv("/tmp/RMA_originaldata/1993.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1994 <- read.csv("/tmp/RMA_originaldata/1994.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1995 <- read.csv("/tmp/RMA_originaldata/1995.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1996 <- read.csv("/tmp/RMA_originaldata/1996.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1997 <- read.csv("/tmp/RMA_originaldata/1997.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1998 <- read.csv("/tmp/RMA_originaldata/1998.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma1999 <- read.csv("/tmp/RMA_originaldata/1999.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2000 <- read.csv("/tmp/RMA_originaldata/2000.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2001 <- read.csv("/tmp/RMA_originaldata/2001.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2002 <- read.csv("/tmp/RMA_originaldata/2002.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2003 <- read.csv("/tmp/RMA_originaldata/2003.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2004 <- read.csv("/tmp/RMA_originaldata/2004.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2005 <- read.csv("/tmp/RMA_originaldata/2005.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2006 <- read.csv("/tmp/RMA_originaldata/2006.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2007 <- read.csv("/tmp/RMA_originaldata/2007.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2008 <- read.csv("/tmp/RMA_originaldata/2008.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2009 <- read.csv("/tmp/RMA_originaldata/2009.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2010 <- read.csv("/tmp/RMA_originaldata/2010.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2011 <- read.csv("/tmp/RMA_originaldata/2011.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2012 <- read.csv("/tmp/RMA_originaldata/2012.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2013 <- read.csv("/tmp/RMA_originaldata/2013.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2014 <- read.csv("/tmp/RMA_originaldata/2014.txt", sep = "|", header = FALSE, strip.white = TRUE)
rma2015 <- read.csv("/tmp/RMA_originaldata/2015.txt", sep = "|", header = FALSE, strip.white = TRUE)
#load individual files from 1989 to 2000
RMA_1989 <- rbind(rma1989, rma1990, rma1991, rma1992, rma1993, rma1994, rma1995, rma1996, rma1997, rma1998, rma1999, rma2000)
#filter columns
RMA_1989 <- data.frame(RMA_1989[,1], RMA_1989[,3], RMA_1989[,5], RMA_1989[,7], RMA_1989[,12], RMA_1989[,13], RMA_1989[,14], RMA_1989[,15])
#set column names
colnames(RMA_1989) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "loss")
#load individual files from 2001 to 2015
RMA <- rbind(rma2001, rma2002, rma2003, rma2004, rma2005, rma2006, rma2007, rma2008, rma2009, rma2010, rma2011, rma2012, rma2013, rma2014, rma2015)
#filter columns
RMA <- data.frame(RMA[,1], RMA[,3], RMA[,5], RMA[,7], RMA[,12], RMA[,13], RMA[,14], RMA[,15], RMA[,16])
#set column names
colnames(RMA) <- c("year", "state", "county", "commodity", "damagecause", "monthcode", "month", "acres", "loss")
#subset 2001 to 2015 for ID, WA, and OR
RMA_PNW <- subset(RMA, state == "WA" | state == "OR" | state == "ID" )
#subset 1989 to 2000 for ID, WA, and OR
RMA_PNW_1989 <- subset(RMA_1989, state == "WA" | state == "OR" | state == "ID" )
#temp = list.files(pattern = paste("Idaho", "_summary", sep=""))
#myfiles = lapply(temp, read.csv, header = TRUE)
#ziggy.df <- do.call(rbind , myfiles)
RMA_PNW <- as.data.frame(RMA_PNW)
RMA_PNW$county <- as.character(RMA_PNW$county)
RMA_PNW$damagecause <- as.character(RMA_PNW$damagecause)
xrange <- RMA_PNW
RMA_PNW$commodity <- trimws(RMA_PNW$commodity)
RMA_PNW$county <- trimws(RMA_PNW$county)
RMA_PNW$damagecause <- trimws(RMA_PNW$damagecause)
ID_RMA_PNW <- subset(RMA_PNW, state == "ID")
OR_RMA_PNW <- subset(RMA_PNW, state == "OR")
WA_RMA_PNW <- subset(RMA_PNW, state == "WA")
#--OR
#--acres for all damage causes aggregated
OR_loss_commodity <- subset(OR_RMA_PNW, commodity == var_commodity)
OR_loss_all <- aggregate(OR_loss_commodity$acres, by=list(OR_loss_commodity$year, OR_loss_commodity$county, OR_loss_commodity$state), FUN = "sum")
colnames(OR_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
OR_loss1 <- subset(OR_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
OR_loss2 <- aggregate(OR_loss1$loss, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
OR_loss2_acres <- aggregate(OR_loss1$acres, by=list(OR_loss1$year, OR_loss1$county, OR_loss1$state), FUN = "sum")
colnames(OR_loss2) <- c("year", "county", "state", "loss")
colnames(OR_loss2_acres) <- c("year", "county", "state", "acres")
OR_loss2$acres <- OR_loss2_acres$acres
OR_loss2$lossperacre <- OR_loss2$loss / OR_loss2$acres
OR_loss_climate <- merge(OR_loss2, OR2[-4], by=c("county", "state"))
OR_loss_climate_2 <- merge(OR_loss_all, OR_loss_climate, by=c("year", "county", "state"))
#-WA
#--acres for all damage causes aggregated
WA_loss_commodity <- subset(WA_RMA_PNW, commodity == var_commodity)
WA_loss_all <- aggregate(WA_loss_commodity$acres, by=list(WA_loss_commodity$year, WA_loss_commodity$county, WA_loss_commodity$state), FUN = "sum")
colnames(WA_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
WA_loss1 <- subset(WA_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
WA_loss2 <- aggregate(WA_loss1$loss, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
WA_loss2_acres <- aggregate(WA_loss1$acres, by=list(WA_loss1$year, WA_loss1$county, WA_loss1$state), FUN = "sum")
colnames(WA_loss2) <- c("year", "county", "state", "loss")
colnames(WA_loss2_acres) <- c("year", "county", "state", "acres")
WA_loss2$acres <- WA_loss2_acres$acres
WA_loss2$lossperacre <- WA_loss2$loss / WA_loss2$acres
WA_loss_climate <- merge(WA_loss2, WA2[-4], by=c("county", "state"))
WA_loss_climate_2 <- merge(WA_loss_all, WA_loss_climate, by=c("year", "county", "state"))
#-ID
#--acres for all damage causes aggregated
ID_loss_commodity <- subset(ID_RMA_PNW, commodity == var_commodity)
ID_loss_all <- aggregate(ID_loss_commodity$acres, by=list(ID_loss_commodity$year, ID_loss_commodity$county, ID_loss_commodity$state), FUN = "sum")
colnames(ID_loss_all) <- c("year", "county", "state", "all_damagecause_acres")
ID_loss1 <- subset(ID_RMA_PNW, commodity == var_commodity & damagecause == var_damage)
ID_loss2 <- aggregate(ID_loss1$loss, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
ID_loss2_acres <- aggregate(ID_loss1$acres, by=list(ID_loss1$year, ID_loss1$county, ID_loss1$state), FUN = "sum")
colnames(ID_loss2) <- c("year", "county", "state", "loss")
colnames(ID_loss2_acres) <- c("year", "county", "state", "acres")
ID_loss2$acres <- ID_loss2_acres$acres
ID_loss2$lossperacre <- ID_loss2$loss / ID_loss2$acres
ID_loss_climate <- merge(ID_loss2, ID2[-4], by=c("county", "state"))
ID_loss_climate_2 <- merge(ID_loss_all, ID_loss_climate, by=c("year", "county", "state"))
merged_iPNW <- rbind(ID_loss_climate_2, WA_loss_climate_2, OR_loss_climate_2)
merged_iPNW$pct_acreage <- merged_iPNW$acres / merged_iPNW$all_damagecause_acres
#
#--plotting results of individual variables
pairs(cube_loss ~ pr_scaled + tmmx_scaled + pet_scaled + pdsi_scaled, data = data4aa, col = data4aa$state,
lower.panel=panel.smooth, upper.panel=panel.cor, main = "initial pairs plot")
par(mar=c(12.7, 6.1, 4, 2.1), family = 'serif', mgp=c(4, 1, 0), las=0)
data4aaaa <- data4aa
ggplot(data4aaaa, aes(pet_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Potential Evapotranspiration (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(pr_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Precipitation (mm)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(tmmx_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled Max Temperature (C)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(pdsi_scaled, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Scaled PDSI", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
ggplot(data4aaaa, aes(prpet, cube_loss, color = state)) +
geom_point(aes(size = price)) +
stat_smooth(aes(group = 1), method = "loess") + labs(x = "Aridity Index (PR / PET)", y = "Cube root loss ($)") + theme(axis.title.y = element_text(family = "Serif", size=16)) + theme(axis.title.x = element_text(family = "Serif", size=16), axis.text.x = element_text(size=rel(1.5), hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.5), hjust = 1, family = "Serif"))
library(maptools)
library(RColorBrewer)
library(leaflet)
library(raster)
library(RCurl)
# Make big tree
#form <- as.formula(loss_zscore ~ pr_zscore + tmmx_zscore + pet_zscore)
#form2 <- as.formula(cube_loss ~ pr + tmmx + pet + pdsi + price)
#form2a <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
#form2b <- as.formula(cube_loss ~ pr.x + tmmx.x + pet.x + pdsi.x + price)
#form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)
#form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + price)
#form3a <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + sph + srad + vs + th + erc + fm100 + fm1000 + price)
data44a <- data.frame(data4aaaa)
data44a <- merge(merged_iPNW, data44a, by=c("state", "county","year"))
data5a_statecountyear <- cbind.data.frame(data44a$state, data44a$county, data44a$year)
colnames(data5a_statecountyear) <- c("state", "county", "year")
data44a$tmmx.x <- data44a$tmmx.x - 273
data5a <- cbind.data.frame(data44a$year, data44a$cube_loss, data44a$loss, data44a$pr_scaled, data44a$tmmx_scaled, data44a$fm100.x, data44a$fm1000.x, data44a$pdsi_scaled, data44a$erc, data44a$srad, data44a$vs, data44a$sph, data44a$th, data44a$pet_scaled, data44a$pct_acreage, data44a$price_scaled, data44a$state)
colnames(data5a) <- c("year", "cube_loss", "loss", "pr", "tmmx", "fm100", "fm1000", "pdsi", "erc", "srad", "vs", "sph", "th", "pet", "pct_acreage", "price", "state" )
data5a <- data.frame(data5a)
data5a$loss <- log10(data5a$loss)
data5ab <- cbind.data.frame(data5a$loss, data5a$pr, data5a$tmmx, data5a$pdsi, data5a$pet, data5a$price)
colnames(data5ab) <- c("loss", "pr", "tmmx", "pdsi", "pet", "price")
data5ab <- data.frame(data5ab)
#data5ab$loss <- log10(data5ab$loss)
# load libraries
library(caret)
library(rpart)
# define training control
train_control<- trainControl(method="repeatedcv", number=10, savePredictions = TRUE)
data5b <- data.frame(data5a)
data5b <- na.omit(data5b)
data5ab <- data.frame(data5ab)
data5ab <- na.omit(data5ab)
#set up train and test for two model datasets: 1) pct acreage and 2) seasonal loss
set.seed(9992)
#trainIndex <- sample(1:nrow(data5b), 0.8 * nrow(data5b))
trainIndex <- createDataPartition(data5b$pct_acreage, p = .75, list = FALSE)
train <- data5b[trainIndex,]
test <- data5b[-trainIndex,]
trainIndex_loss <- caret::createDataPartition(data5ab$loss, p = .75, list = FALSE)
train_loss <- data5ab[trainIndex_loss,]
test_loss <- data5ab[-trainIndex_loss,]
#set the range of mtry for random forest
rf_grid <- expand.grid(mtry = 2:5) # you can put different values for mtry
#model setup
#climate vs pct drought acreage model
form3 <- as.formula(pct_acreage ~ pr + tmmx + pet + pdsi + price)
model<- caret::train(form3, data=train, trControl=train_control, importance=T, method="rf", tuneGrid = rf_grid, ntrees = 500)
model_singular <- caret::train(form3, data=train, trControl=train_control, method="rpart")
model_loss_all_acreage <- caret::train(form3, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
#climate vs seasonal loss ($)
form2a <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
model_loss <- caret::train(form2a, data=train_loss, trControl=train_control, method="rf", tuneGrid = rf_grid, ntrees = 1000, importance = T)
model_loss_all <- caret::train(form2a, data=data5b, trControl=train_control, method="rf", ntrees = 500, tuneGrid = rf_grid, importance = T)
#cforest_model_loss <- cforest(form2a,
# data = train_loss,
# controls=cforest_unbiased(ntree=2000, mtry=5))
#lattice::densityplot(model_loss,
# adjust = 1.25)
tree_num <- which(model_loss$finalModel$err.rate[, 1] == min(model_loss$finalModel$err.rate[, 1]))
lda_data <- learing_curve_dat(dat = model$trainingData,
outcome = ".outcome",
## `train` arguments:
metric = "RMSE",
trControl = train_control,
method = "rf")
## Training for 10% (n = 26)
## Training for 20% (n = 52)
## Training for 30% (n = 79)
## Training for 40% (n = 105)
## Training for 50% (n = 132)
## Training for 60% (n = 158)
## Training for 70% (n = 184)
## Training for 80% (n = 211)
## Training for 90% (n = 237)
## Training for 100% (n = 264)
pts <- pretty(lda_data$RMSE)
#pts <- c(0,0.1, 0.2, 0.3, 0.4)
lda_data$Data[lda_data$Data == "Resampling"] <- c("Validation")
ggplot(lda_data, aes(x = Training_Size, y = RMSE, color = Data)) +
geom_smooth(method = loess, span = .8) +
theme_bw() + ggtitle("Random Forest Learning Curves: Train vs. Validation") + theme(axis.title.y = element_text(family = "Serif", size=18), axis.title.x = element_text(family = "Serif", size = 18), axis.text.x = element_text(size=rel(1.9), angle = 90, hjust = 1, family = "Serif"), axis.text.y = element_text(size=rel(1.9), hjust = 1, family = "Serif")) + theme(plot.title = element_text(family = "Serif", vjust = 2)) + theme(legend.text=element_text(family = "Serif", size=14)) + theme(legend.title=element_text(family = "Serif", size=16, face = "bold")) + theme(plot.title = element_text(size=24, face = "bold")) + scale_y_continuous(labels = pts, breaks = pts ) + xlab("Training Size") + ylab("RMSE") + theme(legend.position="bottom") + scale_fill_discrete(name = "Legend")
sqrt(model_loss$finalModel$mse[which.min(model_loss$finalModel$mse)])
## [1] 0.7158427
which.min(model_loss$finalModel$mse)
## [1] 169
importance <- varImp(model_loss, scale=FALSE)
importance2 <- importance$importance
importance2$variable <- rownames(importance2)
importance2 <- importance2[order(-importance2$Overall),]
par(mar=c(6, 7, 2, 3), family = 'serif', mgp=c(5, 1, 0), las=0)
barplot(sort(importance2$Overall), horiz = TRUE, col = "lightblue", ylab = "predictor variables", xlab = "% importance", cex.lab = 1.7, las = 2, cex.axis = 1.8, cex.names = 1.8, names.arg = rev(importance2$variable))
#NRMSE
nrmse_func <- function(obs, pred, type = "sd") {
squared_sums <- sum((obs - pred)^2)
mse <- squared_sums/length(obs)
rmse <- sqrt(mse)
if (type == "sd") nrmse <- rmse/sd(obs)
if (type == "mean") nrmse <- rmse/mean(obs)
if (type == "maxmin") nrmse <- rmse/ (max(obs) - min(obs))
if (type == "iq") nrmse <- rmse/ (quantile(obs, 0.75) - quantile(obs, 0.25))
if (!type %in% c("mean", "sd", "maxmin", "iq")) message("Wrong type!")
nrmse <- round(nrmse, 3)
return(nrmse)
}
#ensembe
library(caretEnsemble)
#algorithmList <- c('gbm', 'rpart', 'ctree', 'rf', 'HYFIS', 'FS.HGD', 'ANFIS' )
algorithmList <- c('gbm', 'rpart', 'ctree', 'rf')
set.seed(100)
form2 <- as.formula(loss ~ pr + tmmx + pet + pdsi + price)
models <- caretList(form2, data=data5b, trControl=train_control, methodList=algorithmList)
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8804 -nan 0.1000 0.0445
## 2 0.8384 -nan 0.1000 0.0385
## 3 0.8092 -nan 0.1000 0.0251
## 4 0.7767 -nan 0.1000 0.0318
## 5 0.7546 -nan 0.1000 0.0170
## 6 0.7343 -nan 0.1000 0.0150
## 7 0.7131 -nan 0.1000 0.0190
## 8 0.6982 -nan 0.1000 0.0106
## 9 0.6821 -nan 0.1000 0.0133
## 10 0.6666 -nan 0.1000 0.0130
## 20 0.5697 -nan 0.1000 0.0023
## 40 0.4944 -nan 0.1000 0.0010
## 60 0.4692 -nan 0.1000 -0.0045
## 80 0.4545 -nan 0.1000 -0.0011
## 100 0.4416 -nan 0.1000 0.0003
## 120 0.4271 -nan 0.1000 -0.0004
## 140 0.4155 -nan 0.1000 -0.0014
## 150 0.4114 -nan 0.1000 -0.0028
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8866 -nan 0.1000 0.0442
## 2 0.8248 -nan 0.1000 0.0448
## 3 0.7800 -nan 0.1000 0.0448
## 4 0.7392 -nan 0.1000 0.0344
## 5 0.7042 -nan 0.1000 0.0286
## 6 0.6734 -nan 0.1000 0.0226
## 7 0.6461 -nan 0.1000 0.0202
## 8 0.6221 -nan 0.1000 0.0162
## 9 0.5978 -nan 0.1000 0.0170
## 10 0.5865 -nan 0.1000 0.0107
## 20 0.4976 -nan 0.1000 -0.0009
## 40 0.4156 -nan 0.1000 -0.0028
## 60 0.3878 -nan 0.1000 -0.0021
## 80 0.3620 -nan 0.1000 -0.0021
## 100 0.3357 -nan 0.1000 0.0003
## 120 0.3170 -nan 0.1000 -0.0014
## 140 0.3031 -nan 0.1000 -0.0008
## 150 0.2970 -nan 0.1000 -0.0017
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8638 -nan 0.1000 0.0614
## 2 0.7999 -nan 0.1000 0.0505
## 3 0.7548 -nan 0.1000 0.0463
## 4 0.7118 -nan 0.1000 0.0401
## 5 0.6817 -nan 0.1000 0.0329
## 6 0.6482 -nan 0.1000 0.0213
## 7 0.6205 -nan 0.1000 0.0295
## 8 0.5979 -nan 0.1000 0.0234
## 9 0.5723 -nan 0.1000 0.0178
## 10 0.5594 -nan 0.1000 0.0074
## 20 0.4595 -nan 0.1000 0.0008
## 40 0.3764 -nan 0.1000 0.0024
## 60 0.3385 -nan 0.1000 -0.0023
## 80 0.3049 -nan 0.1000 -0.0022
## 100 0.2783 -nan 0.1000 -0.0024
## 120 0.2565 -nan 0.1000 -0.0015
## 140 0.2374 -nan 0.1000 -0.0016
## 150 0.2300 -nan 0.1000 -0.0029
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9049 -nan 0.1000 0.0345
## 2 0.8625 -nan 0.1000 0.0353
## 3 0.8286 -nan 0.1000 0.0270
## 4 0.8027 -nan 0.1000 0.0235
## 5 0.7766 -nan 0.1000 0.0204
## 6 0.7543 -nan 0.1000 0.0178
## 7 0.7332 -nan 0.1000 0.0125
## 8 0.7177 -nan 0.1000 0.0090
## 9 0.7023 -nan 0.1000 0.0119
## 10 0.6866 -nan 0.1000 0.0083
## 20 0.5871 -nan 0.1000 0.0000
## 40 0.5098 -nan 0.1000 -0.0002
## 60 0.4812 -nan 0.1000 -0.0012
## 80 0.4598 -nan 0.1000 -0.0007
## 100 0.4433 -nan 0.1000 -0.0003
## 120 0.4295 -nan 0.1000 -0.0012
## 140 0.4189 -nan 0.1000 -0.0009
## 150 0.4138 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8790 -nan 0.1000 0.0644
## 2 0.8264 -nan 0.1000 0.0404
## 3 0.7797 -nan 0.1000 0.0317
## 4 0.7439 -nan 0.1000 0.0306
## 5 0.7168 -nan 0.1000 0.0148
## 6 0.6803 -nan 0.1000 0.0301
## 7 0.6575 -nan 0.1000 0.0226
## 8 0.6356 -nan 0.1000 0.0092
## 9 0.6174 -nan 0.1000 0.0176
## 10 0.5969 -nan 0.1000 0.0133
## 20 0.4983 -nan 0.1000 0.0029
## 40 0.4231 -nan 0.1000 0.0043
## 60 0.3805 -nan 0.1000 -0.0021
## 80 0.3511 -nan 0.1000 -0.0024
## 100 0.3274 -nan 0.1000 -0.0023
## 120 0.3078 -nan 0.1000 -0.0018
## 140 0.2942 -nan 0.1000 -0.0026
## 150 0.2852 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8778 -nan 0.1000 0.0509
## 2 0.8188 -nan 0.1000 0.0442
## 3 0.7800 -nan 0.1000 0.0390
## 4 0.7369 -nan 0.1000 0.0296
## 5 0.6957 -nan 0.1000 0.0307
## 6 0.6627 -nan 0.1000 0.0247
## 7 0.6430 -nan 0.1000 0.0107
## 8 0.6173 -nan 0.1000 0.0078
## 9 0.5958 -nan 0.1000 0.0186
## 10 0.5759 -nan 0.1000 0.0075
## 20 0.4725 -nan 0.1000 -0.0038
## 40 0.3954 -nan 0.1000 -0.0025
## 60 0.3453 -nan 0.1000 -0.0028
## 80 0.3088 -nan 0.1000 -0.0029
## 100 0.2856 -nan 0.1000 -0.0016
## 120 0.2652 -nan 0.1000 -0.0051
## 140 0.2489 -nan 0.1000 -0.0006
## 150 0.2390 -nan 0.1000 -0.0042
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8981 -nan 0.1000 0.0432
## 2 0.8671 -nan 0.1000 0.0284
## 3 0.8330 -nan 0.1000 0.0170
## 4 0.8075 -nan 0.1000 0.0251
## 5 0.7857 -nan 0.1000 0.0195
## 6 0.7669 -nan 0.1000 0.0078
## 7 0.7510 -nan 0.1000 0.0126
## 8 0.7327 -nan 0.1000 0.0159
## 9 0.7156 -nan 0.1000 0.0125
## 10 0.7017 -nan 0.1000 0.0091
## 20 0.6100 -nan 0.1000 0.0035
## 40 0.5359 -nan 0.1000 -0.0001
## 60 0.5034 -nan 0.1000 -0.0005
## 80 0.4870 -nan 0.1000 -0.0035
## 100 0.4699 -nan 0.1000 -0.0016
## 120 0.4578 -nan 0.1000 -0.0023
## 140 0.4453 -nan 0.1000 -0.0015
## 150 0.4369 -nan 0.1000 -0.0022
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8901 -nan 0.1000 0.0440
## 2 0.8300 -nan 0.1000 0.0491
## 3 0.7909 -nan 0.1000 0.0358
## 4 0.7474 -nan 0.1000 0.0392
## 5 0.7120 -nan 0.1000 0.0322
## 6 0.6865 -nan 0.1000 0.0267
## 7 0.6652 -nan 0.1000 0.0175
## 8 0.6447 -nan 0.1000 0.0167
## 9 0.6262 -nan 0.1000 0.0118
## 10 0.6116 -nan 0.1000 0.0065
## 20 0.5185 -nan 0.1000 0.0029
## 40 0.4556 -nan 0.1000 -0.0010
## 60 0.4046 -nan 0.1000 -0.0017
## 80 0.3666 -nan 0.1000 -0.0013
## 100 0.3494 -nan 0.1000 -0.0016
## 120 0.3302 -nan 0.1000 -0.0011
## 140 0.3149 -nan 0.1000 -0.0030
## 150 0.3046 -nan 0.1000 -0.0018
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8765 -nan 0.1000 0.0542
## 2 0.8198 -nan 0.1000 0.0559
## 3 0.7741 -nan 0.1000 0.0306
## 4 0.7395 -nan 0.1000 0.0242
## 5 0.7056 -nan 0.1000 0.0258
## 6 0.6712 -nan 0.1000 0.0260
## 7 0.6489 -nan 0.1000 0.0142
## 8 0.6284 -nan 0.1000 0.0163
## 9 0.6099 -nan 0.1000 0.0069
## 10 0.5904 -nan 0.1000 0.0148
## 20 0.4852 -nan 0.1000 0.0107
## 40 0.3991 -nan 0.1000 -0.0000
## 60 0.3564 -nan 0.1000 -0.0003
## 80 0.3213 -nan 0.1000 -0.0032
## 100 0.2911 -nan 0.1000 -0.0025
## 120 0.2731 -nan 0.1000 -0.0021
## 140 0.2537 -nan 0.1000 -0.0022
## 150 0.2459 -nan 0.1000 -0.0023
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8573 -nan 0.1000 0.0354
## 2 0.8211 -nan 0.1000 0.0273
## 3 0.7848 -nan 0.1000 0.0297
## 4 0.7538 -nan 0.1000 0.0270
## 5 0.7328 -nan 0.1000 0.0197
## 6 0.7116 -nan 0.1000 0.0140
## 7 0.6945 -nan 0.1000 0.0125
## 8 0.6806 -nan 0.1000 0.0098
## 9 0.6616 -nan 0.1000 0.0069
## 10 0.6480 -nan 0.1000 0.0116
## 20 0.5574 -nan 0.1000 0.0032
## 40 0.4792 -nan 0.1000 0.0001
## 60 0.4519 -nan 0.1000 0.0004
## 80 0.4360 -nan 0.1000 -0.0006
## 100 0.4200 -nan 0.1000 -0.0016
## 120 0.4080 -nan 0.1000 -0.0004
## 140 0.3975 -nan 0.1000 -0.0020
## 150 0.3917 -nan 0.1000 -0.0007
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8347 -nan 0.1000 0.0590
## 2 0.7856 -nan 0.1000 0.0437
## 3 0.7432 -nan 0.1000 0.0328
## 4 0.7068 -nan 0.1000 0.0256
## 5 0.6798 -nan 0.1000 0.0251
## 6 0.6549 -nan 0.1000 0.0176
## 7 0.6329 -nan 0.1000 0.0153
## 8 0.6105 -nan 0.1000 0.0162
## 9 0.5987 -nan 0.1000 0.0042
## 10 0.5822 -nan 0.1000 0.0130
## 20 0.4843 -nan 0.1000 0.0029
## 40 0.4089 -nan 0.1000 -0.0004
## 60 0.3681 -nan 0.1000 0.0050
## 80 0.3416 -nan 0.1000 -0.0008
## 100 0.3191 -nan 0.1000 -0.0021
## 120 0.3019 -nan 0.1000 -0.0014
## 140 0.2830 -nan 0.1000 -0.0017
## 150 0.2749 -nan 0.1000 -0.0029
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8254 -nan 0.1000 0.0520
## 2 0.7696 -nan 0.1000 0.0428
## 3 0.7234 -nan 0.1000 0.0435
## 4 0.6783 -nan 0.1000 0.0329
## 5 0.6385 -nan 0.1000 0.0279
## 6 0.6147 -nan 0.1000 0.0165
## 7 0.5900 -nan 0.1000 0.0137
## 8 0.5662 -nan 0.1000 0.0132
## 9 0.5490 -nan 0.1000 0.0104
## 10 0.5333 -nan 0.1000 0.0093
## 20 0.4387 -nan 0.1000 -0.0019
## 40 0.3516 -nan 0.1000 -0.0038
## 60 0.3166 -nan 0.1000 -0.0027
## 80 0.2846 -nan 0.1000 -0.0014
## 100 0.2666 -nan 0.1000 -0.0040
## 120 0.2458 -nan 0.1000 -0.0015
## 140 0.2277 -nan 0.1000 -0.0010
## 150 0.2204 -nan 0.1000 -0.0013
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9107 -nan 0.1000 0.0496
## 2 0.8595 -nan 0.1000 0.0342
## 3 0.8271 -nan 0.1000 0.0276
## 4 0.8042 -nan 0.1000 0.0198
## 5 0.7759 -nan 0.1000 0.0220
## 6 0.7511 -nan 0.1000 0.0156
## 7 0.7283 -nan 0.1000 0.0106
## 8 0.7105 -nan 0.1000 0.0142
## 9 0.6946 -nan 0.1000 0.0136
## 10 0.6794 -nan 0.1000 0.0093
## 20 0.5776 -nan 0.1000 0.0042
## 40 0.4962 -nan 0.1000 -0.0008
## 60 0.4673 -nan 0.1000 -0.0012
## 80 0.4542 -nan 0.1000 -0.0022
## 100 0.4409 -nan 0.1000 -0.0008
## 120 0.4302 -nan 0.1000 -0.0021
## 140 0.4210 -nan 0.1000 -0.0018
## 150 0.4171 -nan 0.1000 -0.0011
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8892 -nan 0.1000 0.0602
## 2 0.8273 -nan 0.1000 0.0615
## 3 0.7819 -nan 0.1000 0.0467
## 4 0.7414 -nan 0.1000 0.0327
## 5 0.7120 -nan 0.1000 0.0189
## 6 0.6738 -nan 0.1000 0.0336
## 7 0.6410 -nan 0.1000 0.0205
## 8 0.6195 -nan 0.1000 0.0173
## 9 0.5982 -nan 0.1000 0.0137
## 10 0.5828 -nan 0.1000 0.0118
## 20 0.4880 -nan 0.1000 0.0002
## 40 0.4194 -nan 0.1000 -0.0023
## 60 0.3760 -nan 0.1000 -0.0005
## 80 0.3474 -nan 0.1000 0.0004
## 100 0.3248 -nan 0.1000 -0.0007
## 120 0.3101 -nan 0.1000 -0.0023
## 140 0.2958 -nan 0.1000 0.0006
## 150 0.2900 -nan 0.1000 -0.0024
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8843 -nan 0.1000 0.0629
## 2 0.8105 -nan 0.1000 0.0512
## 3 0.7592 -nan 0.1000 0.0305
## 4 0.7203 -nan 0.1000 0.0344
## 5 0.6804 -nan 0.1000 0.0372
## 6 0.6514 -nan 0.1000 0.0253
## 7 0.6255 -nan 0.1000 0.0217
## 8 0.6005 -nan 0.1000 0.0161
## 9 0.5792 -nan 0.1000 0.0168
## 10 0.5629 -nan 0.1000 0.0047
## 20 0.4509 -nan 0.1000 0.0021
## 40 0.3644 -nan 0.1000 -0.0002
## 60 0.3254 -nan 0.1000 -0.0027
## 80 0.2998 -nan 0.1000 -0.0047
## 100 0.2784 -nan 0.1000 -0.0031
## 120 0.2550 -nan 0.1000 -0.0029
## 140 0.2401 -nan 0.1000 -0.0025
## 150 0.2332 -nan 0.1000 -0.0014
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8624 -nan 0.1000 0.0517
## 2 0.8246 -nan 0.1000 0.0338
## 3 0.7889 -nan 0.1000 0.0284
## 4 0.7622 -nan 0.1000 0.0181
## 5 0.7386 -nan 0.1000 0.0174
## 6 0.7147 -nan 0.1000 0.0202
## 7 0.6933 -nan 0.1000 0.0123
## 8 0.6780 -nan 0.1000 0.0115
## 9 0.6593 -nan 0.1000 0.0154
## 10 0.6438 -nan 0.1000 0.0140
## 20 0.5475 -nan 0.1000 0.0005
## 40 0.4665 -nan 0.1000 0.0006
## 60 0.4415 -nan 0.1000 -0.0019
## 80 0.4258 -nan 0.1000 -0.0012
## 100 0.4136 -nan 0.1000 -0.0012
## 120 0.4022 -nan 0.1000 -0.0016
## 140 0.3900 -nan 0.1000 -0.0007
## 150 0.3874 -nan 0.1000 -0.0018
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8492 -nan 0.1000 0.0613
## 2 0.7993 -nan 0.1000 0.0324
## 3 0.7456 -nan 0.1000 0.0346
## 4 0.7027 -nan 0.1000 0.0322
## 5 0.6661 -nan 0.1000 0.0288
## 6 0.6406 -nan 0.1000 0.0180
## 7 0.6197 -nan 0.1000 0.0114
## 8 0.5968 -nan 0.1000 0.0168
## 9 0.5757 -nan 0.1000 0.0165
## 10 0.5624 -nan 0.1000 0.0101
## 20 0.4755 -nan 0.1000 0.0066
## 40 0.3972 -nan 0.1000 0.0046
## 60 0.3630 -nan 0.1000 -0.0037
## 80 0.3400 -nan 0.1000 -0.0030
## 100 0.3167 -nan 0.1000 -0.0012
## 120 0.3012 -nan 0.1000 -0.0011
## 140 0.2891 -nan 0.1000 -0.0010
## 150 0.2827 -nan 0.1000 -0.0028
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8535 -nan 0.1000 0.0641
## 2 0.7912 -nan 0.1000 0.0547
## 3 0.7363 -nan 0.1000 0.0460
## 4 0.6936 -nan 0.1000 0.0347
## 5 0.6549 -nan 0.1000 0.0341
## 6 0.6223 -nan 0.1000 0.0237
## 7 0.5952 -nan 0.1000 0.0150
## 8 0.5750 -nan 0.1000 0.0175
## 9 0.5570 -nan 0.1000 0.0058
## 10 0.5404 -nan 0.1000 0.0111
## 20 0.4380 -nan 0.1000 -0.0050
## 40 0.3533 -nan 0.1000 -0.0011
## 60 0.3154 -nan 0.1000 0.0008
## 80 0.2835 -nan 0.1000 -0.0033
## 100 0.2635 -nan 0.1000 -0.0023
## 120 0.2453 -nan 0.1000 -0.0021
## 140 0.2312 -nan 0.1000 -0.0026
## 150 0.2231 -nan 0.1000 -0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9076 -nan 0.1000 0.0442
## 2 0.8690 -nan 0.1000 0.0404
## 3 0.8333 -nan 0.1000 0.0334
## 4 0.8057 -nan 0.1000 0.0247
## 5 0.7836 -nan 0.1000 0.0167
## 6 0.7640 -nan 0.1000 0.0145
## 7 0.7449 -nan 0.1000 0.0148
## 8 0.7249 -nan 0.1000 0.0074
## 9 0.7079 -nan 0.1000 0.0130
## 10 0.6930 -nan 0.1000 0.0115
## 20 0.5859 -nan 0.1000 -0.0057
## 40 0.5048 -nan 0.1000 0.0009
## 60 0.4771 -nan 0.1000 -0.0010
## 80 0.4628 -nan 0.1000 -0.0021
## 100 0.4476 -nan 0.1000 -0.0001
## 120 0.4347 -nan 0.1000 -0.0015
## 140 0.4231 -nan 0.1000 -0.0012
## 150 0.4189 -nan 0.1000 -0.0033
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8765 -nan 0.1000 0.0645
## 2 0.8176 -nan 0.1000 0.0548
## 3 0.7810 -nan 0.1000 0.0447
## 4 0.7386 -nan 0.1000 0.0269
## 5 0.6964 -nan 0.1000 0.0304
## 6 0.6662 -nan 0.1000 0.0280
## 7 0.6491 -nan 0.1000 0.0119
## 8 0.6245 -nan 0.1000 0.0181
## 9 0.6036 -nan 0.1000 0.0148
## 10 0.5876 -nan 0.1000 0.0118
## 20 0.4952 -nan 0.1000 -0.0007
## 40 0.4327 -nan 0.1000 -0.0051
## 60 0.4009 -nan 0.1000 -0.0045
## 80 0.3665 -nan 0.1000 -0.0053
## 100 0.3426 -nan 0.1000 -0.0015
## 120 0.3244 -nan 0.1000 -0.0015
## 140 0.3108 -nan 0.1000 -0.0033
## 150 0.3043 -nan 0.1000 -0.0031
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8822 -nan 0.1000 0.0455
## 2 0.8185 -nan 0.1000 0.0538
## 3 0.7595 -nan 0.1000 0.0536
## 4 0.7150 -nan 0.1000 0.0381
## 5 0.6805 -nan 0.1000 0.0284
## 6 0.6476 -nan 0.1000 0.0227
## 7 0.6258 -nan 0.1000 0.0181
## 8 0.6049 -nan 0.1000 0.0138
## 9 0.5828 -nan 0.1000 0.0153
## 10 0.5670 -nan 0.1000 0.0121
## 20 0.4593 -nan 0.1000 -0.0013
## 40 0.3827 -nan 0.1000 -0.0043
## 60 0.3404 -nan 0.1000 0.0010
## 80 0.3042 -nan 0.1000 -0.0013
## 100 0.2809 -nan 0.1000 -0.0021
## 120 0.2634 -nan 0.1000 -0.0033
## 140 0.2437 -nan 0.1000 -0.0034
## 150 0.2358 -nan 0.1000 -0.0027
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8979 -nan 0.1000 0.0129
## 2 0.8565 -nan 0.1000 0.0330
## 3 0.8201 -nan 0.1000 0.0211
## 4 0.7903 -nan 0.1000 0.0271
## 5 0.7709 -nan 0.1000 0.0092
## 6 0.7459 -nan 0.1000 0.0197
## 7 0.7255 -nan 0.1000 0.0147
## 8 0.7062 -nan 0.1000 0.0134
## 9 0.6906 -nan 0.1000 0.0133
## 10 0.6769 -nan 0.1000 0.0079
## 20 0.5881 -nan 0.1000 0.0020
## 40 0.5088 -nan 0.1000 0.0024
## 60 0.4817 -nan 0.1000 -0.0025
## 80 0.4619 -nan 0.1000 -0.0012
## 100 0.4496 -nan 0.1000 -0.0007
## 120 0.4372 -nan 0.1000 -0.0004
## 140 0.4273 -nan 0.1000 -0.0007
## 150 0.4208 -nan 0.1000 -0.0013
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8712 -nan 0.1000 0.0478
## 2 0.8216 -nan 0.1000 0.0458
## 3 0.7734 -nan 0.1000 0.0370
## 4 0.7355 -nan 0.1000 0.0294
## 5 0.7017 -nan 0.1000 0.0276
## 6 0.6768 -nan 0.1000 0.0151
## 7 0.6556 -nan 0.1000 0.0212
## 8 0.6399 -nan 0.1000 0.0132
## 9 0.6226 -nan 0.1000 0.0076
## 10 0.6056 -nan 0.1000 0.0133
## 20 0.5055 -nan 0.1000 0.0019
## 40 0.4321 -nan 0.1000 -0.0015
## 60 0.3802 -nan 0.1000 -0.0024
## 80 0.3566 -nan 0.1000 -0.0011
## 100 0.3366 -nan 0.1000 -0.0017
## 120 0.3172 -nan 0.1000 -0.0027
## 140 0.3001 -nan 0.1000 -0.0047
## 150 0.2922 -nan 0.1000 -0.0019
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8635 -nan 0.1000 0.0538
## 2 0.8074 -nan 0.1000 0.0460
## 3 0.7527 -nan 0.1000 0.0388
## 4 0.7121 -nan 0.1000 0.0333
## 5 0.6743 -nan 0.1000 0.0243
## 6 0.6444 -nan 0.1000 0.0190
## 7 0.6173 -nan 0.1000 0.0225
## 8 0.5973 -nan 0.1000 0.0165
## 9 0.5827 -nan 0.1000 0.0072
## 10 0.5658 -nan 0.1000 0.0085
## 20 0.4678 -nan 0.1000 0.0075
## 40 0.3918 -nan 0.1000 0.0003
## 60 0.3358 -nan 0.1000 -0.0028
## 80 0.3035 -nan 0.1000 -0.0021
## 100 0.2820 -nan 0.1000 -0.0042
## 120 0.2644 -nan 0.1000 -0.0025
## 140 0.2492 -nan 0.1000 -0.0031
## 150 0.2425 -nan 0.1000 -0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9036 -nan 0.1000 0.0498
## 2 0.8754 -nan 0.1000 0.0180
## 3 0.8361 -nan 0.1000 0.0308
## 4 0.7975 -nan 0.1000 0.0264
## 5 0.7710 -nan 0.1000 0.0226
## 6 0.7495 -nan 0.1000 0.0119
## 7 0.7336 -nan 0.1000 0.0104
## 8 0.7180 -nan 0.1000 0.0133
## 9 0.6987 -nan 0.1000 0.0112
## 10 0.6832 -nan 0.1000 0.0084
## 20 0.5913 -nan 0.1000 0.0031
## 40 0.5183 -nan 0.1000 -0.0033
## 60 0.4946 -nan 0.1000 -0.0030
## 80 0.4804 -nan 0.1000 0.0003
## 100 0.4629 -nan 0.1000 -0.0000
## 120 0.4509 -nan 0.1000 -0.0031
## 140 0.4402 -nan 0.1000 0.0005
## 150 0.4350 -nan 0.1000 -0.0013
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.9018 -nan 0.1000 0.0434
## 2 0.8505 -nan 0.1000 0.0431
## 3 0.8108 -nan 0.1000 0.0436
## 4 0.7694 -nan 0.1000 0.0257
## 5 0.7298 -nan 0.1000 0.0377
## 6 0.7011 -nan 0.1000 0.0141
## 7 0.6755 -nan 0.1000 0.0210
## 8 0.6512 -nan 0.1000 0.0183
## 9 0.6321 -nan 0.1000 0.0149
## 10 0.6188 -nan 0.1000 0.0061
## 20 0.5226 -nan 0.1000 0.0020
## 40 0.4473 -nan 0.1000 0.0038
## 60 0.4031 -nan 0.1000 -0.0031
## 80 0.3750 -nan 0.1000 -0.0032
## 100 0.3522 -nan 0.1000 -0.0008
## 120 0.3347 -nan 0.1000 -0.0042
## 140 0.3177 -nan 0.1000 -0.0008
## 150 0.3114 -nan 0.1000 -0.0032
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8880 -nan 0.1000 0.0625
## 2 0.8316 -nan 0.1000 0.0414
## 3 0.7792 -nan 0.1000 0.0421
## 4 0.7374 -nan 0.1000 0.0280
## 5 0.7143 -nan 0.1000 0.0160
## 6 0.6818 -nan 0.1000 0.0262
## 7 0.6534 -nan 0.1000 0.0184
## 8 0.6287 -nan 0.1000 0.0144
## 9 0.6035 -nan 0.1000 0.0187
## 10 0.5835 -nan 0.1000 0.0117
## 20 0.4784 -nan 0.1000 -0.0018
## 40 0.3915 -nan 0.1000 0.0004
## 60 0.3461 -nan 0.1000 -0.0031
## 80 0.3097 -nan 0.1000 -0.0024
## 100 0.2859 -nan 0.1000 -0.0008
## 120 0.2672 -nan 0.1000 -0.0033
## 140 0.2486 -nan 0.1000 -0.0037
## 150 0.2410 -nan 0.1000 -0.0024
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8707 -nan 0.1000 0.0344
## 2 0.8359 -nan 0.1000 0.0369
## 3 0.8021 -nan 0.1000 0.0251
## 4 0.7729 -nan 0.1000 0.0259
## 5 0.7479 -nan 0.1000 0.0118
## 6 0.7232 -nan 0.1000 0.0153
## 7 0.7056 -nan 0.1000 0.0104
## 8 0.6930 -nan 0.1000 0.0097
## 9 0.6781 -nan 0.1000 0.0080
## 10 0.6619 -nan 0.1000 0.0124
## 20 0.5668 -nan 0.1000 0.0007
## 40 0.4918 -nan 0.1000 -0.0010
## 60 0.4618 -nan 0.1000 -0.0007
## 80 0.4468 -nan 0.1000 -0.0013
## 100 0.4342 -nan 0.1000 -0.0014
## 120 0.4195 -nan 0.1000 -0.0030
## 140 0.4090 -nan 0.1000 -0.0012
## 150 0.4039 -nan 0.1000 -0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8536 -nan 0.1000 0.0515
## 2 0.7973 -nan 0.1000 0.0480
## 3 0.7527 -nan 0.1000 0.0428
## 4 0.7112 -nan 0.1000 0.0341
## 5 0.6843 -nan 0.1000 0.0230
## 6 0.6554 -nan 0.1000 0.0261
## 7 0.6296 -nan 0.1000 0.0177
## 8 0.6078 -nan 0.1000 0.0169
## 9 0.5942 -nan 0.1000 0.0063
## 10 0.5766 -nan 0.1000 0.0130
## 20 0.4825 -nan 0.1000 0.0009
## 40 0.4100 -nan 0.1000 -0.0029
## 60 0.3738 -nan 0.1000 -0.0000
## 80 0.3467 -nan 0.1000 -0.0009
## 100 0.3233 -nan 0.1000 -0.0041
## 120 0.3034 -nan 0.1000 -0.0011
## 140 0.2878 -nan 0.1000 -0.0017
## 150 0.2819 -nan 0.1000 -0.0013
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8581 -nan 0.1000 0.0583
## 2 0.8005 -nan 0.1000 0.0562
## 3 0.7501 -nan 0.1000 0.0376
## 4 0.7090 -nan 0.1000 0.0342
## 5 0.6799 -nan 0.1000 0.0236
## 6 0.6425 -nan 0.1000 0.0339
## 7 0.6091 -nan 0.1000 0.0250
## 8 0.5819 -nan 0.1000 0.0235
## 9 0.5635 -nan 0.1000 0.0137
## 10 0.5457 -nan 0.1000 0.0084
## 20 0.4437 -nan 0.1000 -0.0014
## 40 0.3575 -nan 0.1000 -0.0048
## 60 0.3123 -nan 0.1000 -0.0030
## 80 0.2824 -nan 0.1000 -0.0025
## 100 0.2625 -nan 0.1000 -0.0027
## 120 0.2451 -nan 0.1000 -0.0024
## 140 0.2288 -nan 0.1000 -0.0031
## 150 0.2215 -nan 0.1000 -0.0024
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.8610 -nan 0.1000 0.0609
## 2 0.8017 -nan 0.1000 0.0432
## 3 0.7509 -nan 0.1000 0.0403
## 4 0.7133 -nan 0.1000 0.0386
## 5 0.6762 -nan 0.1000 0.0263
## 6 0.6457 -nan 0.1000 0.0206
## 7 0.6178 -nan 0.1000 0.0191
## 8 0.5929 -nan 0.1000 0.0186
## 9 0.5743 -nan 0.1000 0.0171
## 10 0.5546 -nan 0.1000 0.0091
## 20 0.4496 -nan 0.1000 -0.0005
## 40 0.3775 -nan 0.1000 -0.0004
## 60 0.3361 -nan 0.1000 -0.0012
## 80 0.3069 -nan 0.1000 -0.0017
## 100 0.2878 -nan 0.1000 -0.0033
results <- resamples(models)
summary(results)
##
## Call:
## summary.resamples(object = results)
##
## Models: gbm, rpart, ctree, rf
## Number of resamples: 10
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 0.5167913 0.6348819 0.6569215 0.6723025 0.7532790 0.8221813 0
## rpart 0.6942112 0.7423481 0.8424284 0.8175915 0.8693980 0.9740700 0
## ctree 0.6229102 0.7157282 0.7902009 0.7797358 0.8517393 0.8896204 0
## rf 0.5470982 0.6434738 0.7093379 0.7006481 0.7698019 0.8479182 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## gbm 0.3462222 0.4584156 0.5000016 0.5230262 0.5902535 0.7061177 0
## rpart 0.1059770 0.1840706 0.3076549 0.3084186 0.4424838 0.5321110 0
## ctree 0.1591679 0.3033759 0.3631622 0.3753717 0.4630281 0.5684643 0
## rf 0.3142846 0.3964337 0.4612825 0.4752131 0.5504524 0.6766647 0
set.seed(101)
stackControl <- train_control
# Ensemble the predictions of `models` to form a new combined prediction based on glm
stack.glm <- caretStack(models, method="glm", trControl=stackControl)
print(stack.glm)
## A glm ensemble of 2 base models: gbm, rpart, ctree, rf
##
## Ensemble results:
## Generalized Linear Model
##
## 348 samples
## 4 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times)
## Summary of sample sizes: 312, 313, 313, 312, 314, 312, ...
## Resampling results:
##
## RMSE Rsquared
## 0.6825263 0.517079
##
##
# make predictions for acreage using random forest
predictions<- predict(model,data5b)
# make predictions for seasonal loss using random forest
predictions_loss <- predict(model_loss,data5b)
#make predictions for seasonal loss using ensemble
predictions_ensemble<- predict(stack.glm,data5b)
#cforest_Prediction <- predict(cforest_model_loss, newdata=test_loss, OOB=TRUE, type = "response")
predictions_loss_all <- predict(model_loss_all,data5b)
#--end ensemble
# append predictions
mydat<- cbind(data5b,predictions)
mydat_loss <- cbind(data5b,predictions_loss)
#cforest_mydat_loss <- cbind(test_loss,cforest_Prediction)
mydat_loss_all <- cbind(data5b,predictions_loss_all)
mydat_loss_ensemble <- cbind(data5b,predictions_ensemble)
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat <- subset(finaldat, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat$county)
county_rmse_r2 <- data.frame(matrix(ncol =2, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "county")
ii = 0
for (i in countyvector ) {
ii <- ii + 1
countyplot <- subset(finaldat, county == i)
# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$pct_acreage)
# 2.2. Total sum of squares
ss_total <- sum((countyplot$pct_acreage - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$pct_acreage - countyplot$predictions)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
MAE <- function(m, o)
{
mean(abs(m - o))
}
rmse <- round(RMSE(countyplot$predictions, countyplot$pct_acreage), 2)
mae <- round(MAE(countyplot$predictions, countyplot$pct_acreage), 2)
county_rmse_r2[ii,1] <- rmse
county_rmse_r2[ii,2] <- r2
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)
NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)
fit1 <- lm(form3, data=data5b)
#pct_acreage historical vs. predicted
yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"
countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions[is.na(countyplot$predictions)] <- 0
countyplot$pct_acreage[is.na(countyplot$pct_acreage)] <- 0
par(mar=c(5, 5.1, 2, 3), family = 'serif', mgp=c(3.8, 1, 0), las=0)
plot(c(countyplot$pct_acreage) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red",
las = 2, xaxt = "n", xlab = "Year", ylab = "Percent Acreage Drought Claims", main = paste(i, " County, ", countyplot$state[1], ": R2 = ", r2, " RMSE = ", rmse, sep=""))
lines(countyplot$pct_acreage ~ countyplot$year, col = "red")
points(countyplot$predictions ~ countyplot$year, col = "blue")
lines(countyplot$predictions ~ countyplot$year, col = "blue")
axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)
legend("topright", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.5)
}
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
NRMSE1 <- nrmse_func(finaldat$loss, finaldat$predictions)
NRMSE2 <- nrmse_func(finaldat_loss$loss, finaldat_loss$predictions_loss)
#loss historical vs predicted
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat_loss$loss <- 10^finaldat_loss$loss
finaldat_loss$predictions_loss <- 10^finaldat_loss$predictions_loss
fd_loss <- aggregate(finaldat_loss$loss, by=list(finaldat_loss$year), FUN = "sum")
fd_pred <- aggregate(finaldat_loss$predictions_loss, by=list(finaldat_loss$year), FUN = "sum")
# 2.1. Average of actual data
avr_y_actual <- mean(fd_loss$x)
# 2.2. Total sum of squares
ss_total <- sum((fd_loss$x - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((fd_pred$x - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((fd_loss$x - fd_pred$x)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
rmse <- round(RMSE(fd_pred$x, fd_loss$x), 2)
#plot all counties combined, by year
par(mar=c(6, 5.1, 2, 3), family = 'serif', mgp=c(3.5, .6, 0), las=0)
plot(fd_loss$x ~ fd_loss$Group.1, col = "red", cex.lab = 1.5, cex.axis = 1.3, las = 2, yaxt = "n", xaxt = "n", xlab = "Year", ylab = "Annual Wheat/Drought Loss", main = paste("24 county iPNW study area: R2 = ", r2, " RMSE = ", rmse, sep=""))
lines(fd_loss$x ~ fd_loss$Group.1, col = "red")
points(fd_pred$x ~ fd_pred$Group.1, col = "blue")
lines(fd_pred$x ~ fd_pred$Group.1, col = "blue")
axis(1, fd_loss$Group.1, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.3)
pts <- pretty(fd_loss$x / 1000000)
pts2 <- pretty(fd_loss$x)
axis(2, las = 1, cex.axis = 1.3, at = pts2, labels = paste(pts, "M", sep = ""))
legend("topleft", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.3)
finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
finaldat_loss$loss <- 10^finaldat_loss$loss
finaldat_loss$predictions_loss <- 10^finaldat_loss$predictions_loss
finaldat_loss <- subset(finaldat_loss, county != "Kootenai" & county != "Clearwater")
countyvector <- unique(finaldat_loss$county)
county_rmse_r2 <- data.frame(matrix(ncol =3, nrow = 24))
county_rmse_r2$county <- countyvector
colnames(county_rmse_r2) <- c("rmse", "r2", "nrmse", "county")
ii = 0
for (i in countyvector ) {
ii <- ii + 1
#finaldat <- cbind.data.frame(data5a_statecountyear, mydat)
#finaldat_loss <- cbind.data.frame(data5a_statecountyear, mydat_loss)
#finaldat_loss_all <- cbind.data.frame(data5a_statecountyear, mydat_loss_all)
countyplot <- subset(finaldat_loss, county == i)
yearlist <- data.frame(c(2001:2015))
colnames(yearlist) <- "year"
countyplot <- merge(countyplot, yearlist, by=("year"), all=T)
countyplot$predictions_loss[is.na(countyplot$predictions_loss)] <- 0
countyplot$loss[is.na(countyplot$loss)] <- 0
# 2.1. Average of actual data
avr_y_actual <- mean(countyplot$loss)
# 2.2. Total sum of squares
ss_total <- sum((countyplot$loss - avr_y_actual)^2)
# 2.3. Regression sum of squares
ss_regression <- sum((countyplot$predictions_loss - avr_y_actual)^2)
# 2.4. Residual sum of squares
ss_residuals <- sum((countyplot$loss - countyplot$predictions_loss)^2)
# 3. R2 Score
r2 <- round(1 - ss_residuals / ss_total, 2)
RMSE = function(m, o){
sqrt(mean((m - o)^2))
}
MAE <- function(m, o)
{
mean(abs(m - o))
}
rmse <- round(RMSE(countyplot$predictions_loss, countyplot$loss), 2)
mae <- round(MAE(countyplot$predictions_loss, countyplot$loss), 2)
n1 <- subset(finaldat_loss, county == i)
nrmse <- nrmse_func(n1$loss, n1$predictions_loss)
county_rmse_r2[ii,1] <- rmse
county_rmse_r2[ii,2] <- r2
county_rmse_r2[ii,3] <- nrmse
#plot all counties combined, by year
par(mar=c(6, 5.1, 2, 3), family = 'serif', mgp=c(3.5, .6, 0), las=0)
plot(c(countyplot$loss) ~ c(countyplot$year), cex.lab = 1.5, cex.axis = 1.5, col = "red",
las = 2, yaxt = "n", xaxt = "n", xlab = "Year", ylab = "Wheat/Drought Insurance Loss", main = paste(i, " County, ", countyplot$state.1[1], " ", "R2 = ", r2, " RMSE = ", rmse, " NRMSE = ", nrmse, sep="") )
lines(countyplot$loss ~ countyplot$year, col = "red")
points(countyplot$predictions_loss ~ countyplot$year, col = "blue")
lines(countyplot$predictions_loss ~ countyplot$year, col = "blue")
axis(1, countyplot$year, 2001:2015, col.axis = "black", las = 2, cex.axis = 1.5)
pts <- pretty(countyplot$loss / 1000000)
pts2 <- pretty(countyplot$loss)
axis(2, las = 1, cex.axis = 1.3, at = pts2, labels = paste(pts, "M", sep = ""))
legend("topleft", legend=c("historical", "predicted"),
col=c("red", "blue"), lty=1, cex=1.3)
}